# 爬取控制与决策期刊论文

import requests
from bs4 import BeautifulSoup
url = "http://kzyjc.alljournals.cn/kzyjc/home"
def paperScr(url,selector,flag):

    headers={
            'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 SLBrowser/9.0.3.5211 SLBChan/105'
        }
    r = requests.get(url,headers=headers)
    codes = r.text
    bs = BeautifulSoup(codes,"html.parser")
    if flag == 0:
        return [item.text for item in bs.select(selector=selector)]
    elif flag == 1:
        return [item.attrs['href'] for item in bs.select(selector=selector)]

# 论文题目
Title = "div.title a"
# 论文作者
author = "p.zz span a"

paperTitle = paperScr(url,Title,0)
paperAuthor = paperScr(url,author,0)

paperUrl = paperScr(url,Title,1)

abstracts,pdfurl,Enabstracts = [],[],[]

for url in paperUrl:
    FullUrl = "http://kzyjc.alljournals.cn/"+url
    abstracts.append(paperScr(FullUrl,"#CnAbstractValue",0))
    Enabstracts.append(paperScr(FullUrl,"#EnAbstractValue",0))
    pdfurl.append(paperScr(FullUrl, "#PdfUrl", flag=1))
print("论文标题:",paperTitle)
print("论文作者：",paperAuthor)
print("论文中文摘要：",abstracts)
print("论文英文摘要：",Enabstracts)
print("论文链接：",pdfurl)
